library(tidyverse)
library(fixest)
library(readxl)
library(broom)
library(ggthemes)

#### Load in main analysis data and the EM-DAT data, and merge

## Main data
# source("code/01-importdata.R")

## EM-DAT data
emdat <- read_excel("data/emdat_public_2020_06_22_query_uid-P0QmXZ.xlsx", 
                    skip = 6) %>% 
  select(country = Country, 
         iso3c = ISO, 
         year = `Start Year`,
         disaster_subgroup = `Disaster Subgroup`,
         disaster_type = `Disaster Type`,
         total_deaths = `Total Deaths`,
         total_affected = `Total Affected`,
         number_injured = `No Injured`,
         number_affected = `No Affected`,
         number_homeless = `No Homeless`,
         costs_reconstruction = `Reconstruction Costs ('000 US$)`,
         costs_insured = `Insured Damages ('000 US$)`,
         costs_total = `Total Damages ('000 US$)`) %>% 
  filter(disaster_subgroup!="Extra-terrestrial",
         disaster_subgroup!="Biological",
         disaster_subgroup!="Geophysical")  %>%
  filter(year<2020) %>% 
  group_by(iso3c, year) %>% 
  mutate(disaster_count = row_number()) %>% 
  mutate(annual_total_disasters = as.numeric(max(disaster_count)), 
         annual_total_affected = sum(total_affected, na.rm=T),
         annual_total_deaths = sum(total_deaths, na.rm=T),
         annual_total_damages = sum(costs_total, na.rm=T)) %>% 
  ungroup() %>% 
  select(country, iso3c, year, starts_with("annual_")) %>% 
  distinct() %>% 
  arrange(iso3c, year) %>% 
  as.data.frame()

# Create empty panel because of missing years in EM-DAT  
emdat_iso <- unique(emdat$iso3c)
emdat_years <- seq(from = min(emdat$year, na.rm=T), 
                  to = max(emdat$year, na.rm=T),
                  by = 1)

# Combine into a panel
emdat_panel <- full_join(
  # Create an empty panel
  cbind.data.frame( 
    iso3c = rep(emdat_iso,  
                each = length(emdat_years)),
    year = rep(emdat_years,
               times = length(emdat_iso))),
  # Merge the EM-DAT data in
  emdat, by = c("iso3c", "year")) %>% 
  select(-country) %>% 
  mutate_at(vars(annual_total_disasters,
                 annual_total_affected,
                 annual_total_deaths,
                 annual_total_damages), ~ case_when(is.na(.) ~ 0,
                                                    !is.na(.) ~ .)) %>% 
  # Log transform
  mutate_at(vars(annual_total_affected, 
               annual_total_deaths, 
               annual_total_damages), ~log(1+.)) %>% 
  arrange(iso3c, year) %>% 
  group_by(iso3c) %>% 
  do(data.frame(., setNames(shift(.$annual_total_disasters, 1:3),
                            paste0('annual_total_disasters_lag', 1:3)))) %>% 
  do(data.frame(., setNames(shift(.$annual_total_affected, 1:3),
                            paste0('annual_total_affected_lag', 1:3)))) %>% 
  do(data.frame(., setNames(shift(.$annual_total_deaths, 1:3),
                            paste0('annual_total_deaths_lag', 1:3)))) %>% 
  do(data.frame(., setNames(shift(.$annual_total_damages, 1:3),
                            paste0('annual_total_damages_lag', 1:3)))) %>% 
  ungroup() %>% 
  arrange(iso3c, year) 


## Create spatial terms for natural disasters

## Load in contiguity data

# Raw contiguity
contdird_raw <- read_csv("data/contdird.csv")

# Tidy
contdird_df <- contdird_raw %>% 
  filter(year == max(year, na.rm=T)) %>%
  mutate(contiguous = ifelse(conttype>0, 1, 0)) %>% 
  mutate(iso3_state1 = countrycode::countrycode(.$state1no, origin = 'cown', destination = 'iso3c'),
         iso3_state2 = countrycode::countrycode(.$state2no, origin = 'cown', destination = 'iso3c')) %>% 
  select(iso3_state1, iso3_state2, contiguous)

# Extract states list from temperature data
states_list_analysis <- emdat_panel %>% 
  filter(!is.na(annual_total_disasters),
         year == 2005,
         iso3c!="TWN") %>% 
  select(iso3c) %>% 
  as.vector()

# Unique, sorted states list
states_list <- cbind.data.frame(iso3c = unique(contdird_df$iso3_state1))
states_list <- states_list %>% filter(iso3c %in% states_list_analysis$iso3c)
states_list <- sort(unique(states_list$iso3c))

# Make contiguity dyadic data
cont_df <- data.frame(iso3_state1 = rep(states_list, times = length(states_list)),
                      iso3_state2 = rep(states_list, each = length(states_list))) %>% 
  left_join(., contdird_df, by = c("iso3_state1", "iso3_state2")) %>% 
  mutate(contiguous = ifelse(is.na(contiguous), 0, contiguous)) %>% 
  arrange(iso3_state1, iso3_state2)
# cont_df %>% .[1:10,]

# Row-standardize and make a matrix
cont_matrix <- cont_df %>% 
  pivot_wider(names_from = iso3_state2, values_from = contiguous) %>% 
  column_to_rownames("iso3_state1") %>% 
  as.data.frame() %>% 
  mutate(row_total = rowSums(across(where(is.numeric)))) %>% 
  mutate_at(vars(AFG:ZWE), ~./row_total) %>% 
  select(-row_total) %>% 
  as.matrix()
diag(cont_matrix) <- 0
cont_matrix %>% .[1:10,1:10]

## Calculate each country's residual of natural disasters, for lags and measures
emdat_demeaned <- emdat_panel %>% 
  filter(iso3c %in% states_list,
         !is.na(annual_total_disasters),
         year >= 1990) %>% 
  # Annual count of disasters
  mutate(resid_count_lag1 = residuals(lm(annual_total_disasters_lag1 ~ 
                                            as.factor(iso3c) + 
                                            as.factor(year),
                                          data = .))) %>% 
  mutate(resid_count_lag2 = residuals(lm(annual_total_disasters_lag2 ~ 
                                            as.factor(iso3c) + 
                                            as.factor(year),
                                          data = .))) %>% 
  mutate(resid_count_lag3 = residuals(lm(annual_total_disasters_lag3 ~ 
                                            as.factor(iso3c) + 
                                            as.factor(year),
                                          data = .))) %>% 
  # # Annual count of affected persons
  mutate(resid_persons_lag1 = residuals(lm(annual_total_affected_lag1 ~
                                            as.factor(iso3c) +
                                            as.factor(year),
                                          data = .))) %>%
  mutate(resid_persons_lag2 = residuals(lm(annual_total_affected_lag2 ~
                                            as.factor(iso3c) +
                                            as.factor(year),
                                          data = .))) %>%
  mutate(resid_persons_lag3 = residuals(lm(annual_total_affected_lag3 ~
                                            as.factor(iso3c) +
                                            as.factor(year),
                                          data = .))) %>%
  select(iso3c, year, starts_with("resid")) %>% 
  arrange(iso3c)
# head(emdat_demeaned)

### Now make the spatial weights

# Make object for loops

# To hold loop outputs
spatial_emdat_annual_lag1 <-  as.data.frame(matrix(NA, nrow = 180, ncol = 30))
spatial_emdat_annual_lag2 <-  as.data.frame(matrix(NA, nrow = 180, ncol = 30))
spatial_emdat_annual_lag3 <-  as.data.frame(matrix(NA, nrow = 180, ncol = 30))
spatial_emdat_persons_lag1 <-  as.data.frame(matrix(NA, nrow = 180, ncol = 30))
spatial_emdat_persons_lag2 <-  as.data.frame(matrix(NA, nrow = 180, ncol = 30))
spatial_emdat_persons_lag3 <-  as.data.frame(matrix(NA, nrow = 180, ncol = 30))

# List of years to filter on and loop through
year_list <- unique(emdat_demeaned$year)

for (yearr in year_list) {
  residuals <- emdat_demeaned %>% 
    filter(year == yearr) 
  
  # Annual, lag 1
  hold_spatial_weight <- cont_matrix %*% residuals$resid_count_lag1
  spatial_emdat_annual_lag1[,yearr-1989] <- hold_spatial_weight
  # Annual, lag 2
  hold_spatial_weight <- cont_matrix %*% residuals$resid_count_lag2
  spatial_emdat_annual_lag2[,yearr-1989] <- hold_spatial_weight
  # Annual, lag 3
  hold_spatial_weight <- cont_matrix %*% residuals$resid_count_lag3
  spatial_emdat_annual_lag3[,yearr-1989] <- hold_spatial_weight
  # Season, lag 1
  hold_spatial_weight <- cont_matrix %*% residuals$resid_persons_lag1
  spatial_emdat_persons_lag1[,yearr-1989] <- hold_spatial_weight
  # Season, lag 2
  hold_spatial_weight <- cont_matrix %*% residuals$resid_persons_lag2
  spatial_emdat_persons_lag2[,yearr-1989] <- hold_spatial_weight
  # Annual, lag 3
  hold_spatial_weight <- cont_matrix %*% residuals$resid_persons_lag3
  spatial_emdat_persons_lag3[,yearr-1989] <- hold_spatial_weight
}

# Merge all the spatial weights together
spatial_emdat_df <- full_join(spatial_emdat_annual_lag1 %>% 
                                pivot_longer(names_to = "names", values_to = "annuald_W_lag1", V1:V30) %>% 
                                mutate(iso3c = rep(states_list, times = length(year_list)),
                                       year = rep(year_list, each = length(states_list))) %>% 
                                select(iso3c, year, 2),
                              spatial_emdat_annual_lag2 %>% 
                                pivot_longer(names_to = "names", values_to = "annuald_W_lag2", V1:V30) %>% 
                                mutate(iso3c = rep(states_list, times = length(year_list)),
                                       year = rep(year_list, each = length(states_list))) %>% 
                                  select(iso3c, year, 2),
                              by = c("iso3c", "year")) %>% 
  full_join(., spatial_emdat_annual_lag3 %>%
              pivot_longer(names_to = "names", values_to = "annuald_W_lag3", V1:V30) %>% 
              mutate(iso3c = rep(states_list, times = length(year_list)),
                     year = rep(year_list, each = length(states_list))) %>% 
              select(iso3c, year, 2),
            by = c("iso3c", "year")) %>% 
  full_join(., spatial_emdat_persons_lag1 %>% 
              pivot_longer(names_to = "names", values_to = "persons_W_lag1", V1:V30) %>% 
              mutate(iso3c = rep(states_list, times = length(year_list)),
                     year = rep(year_list, each = length(states_list))) %>% 
              select(iso3c, year, 2),
            by = c("iso3c", "year")) %>% 
  full_join(., spatial_emdat_persons_lag2 %>% 
              pivot_longer(names_to = "names", values_to = "persons_W_lag2", V1:V30) %>% 
              mutate(iso3c = rep(states_list, times = length(year_list)),
                     year = rep(year_list, each = length(states_list))) %>% 
              select(iso3c, year, 2),
            by = c("iso3c", "year")) %>% 
  full_join(., spatial_emdat_persons_lag3 %>% 
              pivot_longer(names_to = "names", values_to = "persons_W_lag3", V1:V30) %>% 
              mutate(iso3c = rep(states_list, times = length(year_list)),
                     year = rep(year_list, each = length(states_list))) %>% 
              select(iso3c, year, 2),
            by = c("iso3c", "year")) 


# Merge these spatial weights into the EMDAT panel
emdat_panel <- full_join(emdat_panel %>% 
                           select(iso3c, year, starts_with("annual_total_disasters"),
                                  starts_with("annual_total_affected")),
                         spatial_emdat_df, by = c("iso3c", "year"))


## Merge
analysis_df <- full_join(analysis_df, emdat_panel, by = c("iso3c", "year"))

#### Run baseline models as from temperature shocks ####

### Annual count of disasters, for three lags
m_emdat_annual1 <- feols(c(scaled_climate_laws, scaled_ecp_zeros,
                           scaled_elec_renew, scaled_fit, scaled_quota,
                           scaled_delegates, scaled_gcg,
                           scaled_cf_total_provided, scaled_cf_principal_provided,
                           scaled_cf_total_received, scaled_cf_principal_received)
                         ~ annual_total_disasters_lag1 + annuald_W_lag1 |
                           iso3c[year] + year, # trends
                         data = analysis_df) %>% 
  as.list()

m_emdat_annual2 <- feols(c(scaled_climate_laws, scaled_ecp_zeros,
                           scaled_elec_renew, scaled_fit, scaled_quota,
                           scaled_delegates, scaled_gcg,
                           scaled_cf_total_provided, scaled_cf_principal_provided,
                           scaled_cf_total_received, scaled_cf_principal_received)
                         ~ annual_total_disasters_lag2 + annuald_W_lag2 |
                           iso3c[year] + year, # trends
                         data = analysis_df) %>% 
  as.list()

m_emdat_annual3 <- feols(c(scaled_climate_laws, scaled_ecp_zeros,
                           scaled_elec_renew, scaled_fit, scaled_quota,
                           scaled_delegates, scaled_gcg,
                           scaled_cf_total_provided, scaled_cf_principal_provided,
                           scaled_cf_total_received, scaled_cf_principal_received)
                         ~ annual_total_disasters_lag3 + annuald_W_lag3 |
                           iso3c[year] + year, # trends
                         data = analysis_df) %>% 
  as.list()

### Annual number of persons affected in disasters, for three lags
m_emdat_persons1 <- feols(c(scaled_climate_laws, scaled_ecp_zeros,
                            scaled_elec_renew, scaled_fit, scaled_quota,
                            scaled_delegates, scaled_gcg,
                            scaled_cf_total_provided, scaled_cf_principal_provided,
                            scaled_cf_total_received, scaled_cf_principal_received)
                          ~ annual_total_affected_lag1 + persons_W_lag1 |
                            iso3c[year] + year, # trends
                          data = analysis_df) %>% 
  as.list()

m_emdat_persons2 <- feols(c(scaled_climate_laws, scaled_ecp_zeros,
                            scaled_elec_renew, scaled_fit, scaled_quota,
                            scaled_delegates, scaled_gcg,
                            scaled_cf_total_provided, scaled_cf_principal_provided,
                            scaled_cf_total_received, scaled_cf_principal_received)
                          ~ annual_total_affected_lag2 + persons_W_lag2 |
                            iso3c[year] + year, # trends
                          data = analysis_df) %>% 
  as.list()

m_emdat_persons3 <- feols(c(scaled_climate_laws, scaled_ecp_zeros,
                           scaled_elec_renew, scaled_fit, scaled_quota,
                           scaled_delegates, scaled_gcg,
                           scaled_cf_total_provided, scaled_cf_principal_provided,
                           scaled_cf_total_received, scaled_cf_principal_received)
                         ~ annual_total_affected_lag3 + persons_W_lag3 |
                           iso3c[year] + year, # trends
                         data = analysis_df) %>% 
  as.list()

emdat_list <- do.call(c, list(m_emdat_annual1, m_emdat_annual2, m_emdat_annual3,
                              m_emdat_persons1, m_emdat_persons2, m_emdat_persons3))

# Create a table to hold the regression outputs for all models
results_emdat <-  setNames(as.data.frame(matrix(NA, nrow = 66, ncol = 10)), 
                            c("outcome", "treatment", "beta", "std_error",
                              "t_stat", "p_value", "nobs", "n_countries",
                              "min_year", "max_year"))

# To remove the spatial term for the outputs: filter(!grepl("_W_",term))


for (i in 1:66){
  results_emdat[i, 1] <- emdat_list[[i]]$fml %>% gsub("\\~", "", .) %>% .[2] # outcome
  results_emdat[i, 2] <- emdat_list[[i]]$fml %>% gsub("\\~", "", .) %>% .[3] # treatment
  results_emdat[i, 3] <- emdat_list[[i]] %>% tidy() %>% filter(!grepl("_W_",term)) %>% select(2) # beta
  results_emdat[i, 4] <- emdat_list[[i]] %>% tidy() %>% filter(!grepl("_W_",term)) %>% select(3) # std error
  results_emdat[i, 5] <- emdat_list[[i]] %>% tidy() %>% filter(!grepl("_W_",term)) %>% select(4) # t-stat
  results_emdat[i, 6] <- emdat_list[[i]] %>% tidy() %>% filter(!grepl("_W_",term)) %>% select(5) # p-val
  results_emdat[i, 7] <- emdat_list[[i]]$nobs # number of observations
  results_emdat[i, 8] <- length(unique(emdat_list[[i]]$fixef_id$iso3c)) # number of countries
  results_emdat[i, 9] <- min(attr(emdat_list[[i]]$fixef_id$year, "fixef_names")) # start year of analysis
  results_emdat[i, 10] <- max(attr(emdat_list[[i]]$fixef_id$year, "fixef_names")) # end year of analysis
}
results_emdat$treatment <- str_sub(results_emdat$treatment, start = 1L, end = -17L)
# View(results_emdat)



#### Plot results from trends models

# Create a labels object
labels_vector <- results_emdat %>% 
  mutate(labels = case_when(outcome=="scaled_climate_laws" ~ "Climate laws",
                            outcome=="scaled_fit" ~ "Feed-in tariff",
                            outcome=="scaled_quota" ~ "Renewables quota",
                            outcome=="scaled_elec_renew" ~ "Renewable electricity\n generation",
                            outcome=="scaled_ecp_zeros" ~ "Emissions-weighted\n carbon price",
                            outcome=="scaled_delegates" ~ "COP delegates",
                            outcome=="scaled_gcg" ~ "Climate institutional\n memberships",
                            outcome=="scaled_cf_total_provided" ~ "Climate finance\n provided",
                            outcome=="scaled_cf_principal_provided" ~ "Climate finance\n (principal) provided",
                            outcome=="scaled_cf_total_received" ~ "Climate finance\n received",
                            outcome=="scaled_cf_principal_received" ~ "Climate finance\n (principal) received")) %>%
  select(labels)

## Plot temperature and lags in two facets
results_emdat %>% 
  # Create lag ID
  mutate(Disasters = case_when(str_detect(treatment, 'lag1')==TRUE ~ "One-year lag",
                                 str_detect(treatment, 'lag2')==TRUE ~ "Two-year lag",
                                 str_detect(treatment, 'lag3')==TRUE ~ "Three-year lag")) %>% 
  # Create facet ID
  mutate(Facet = case_when(str_detect(treatment, 'disasters')==TRUE ~ "Count of disasters",
                           str_detect(treatment, 'affected')==TRUE ~ "Persons affected")) %>% 
  # Create an index to organize results
  group_by(treatment) %>% 
  mutate(n = row_number()) %>% 
  ungroup() %>% 
  group_by(outcome, Facet) %>% 
  mutate(m = row_number()) %>% 
  ungroup() %>% 
  mutate(index = case_when(m == 1 ~ n - 0.2,
                           m == 2 ~ n - 0.0,
                           m == 3 ~ n + 0.2)) %>% 
  # Get confidence intervals
  mutate(ci_lower = beta - 1.96*std_error,
         ci_lower_correction = beta - 2.84*std_error,
         ci_upper = beta + 1.96*std_error,
         ci_upper_correction = beta + 2.84*std_error) %>%
  # Plot
  ggplot(., aes(beta, index, shape = Disasters)) +
  facet_grid(cols = vars(Facet), scales = "free") +
  geom_segment(aes(x = ci_lower_correction, xend = ci_upper_correction,
                   y = index, yend = index),
               size = 1, color = "#79b321") +
  geom_segment(aes(x = ci_lower, xend = ci_upper, 
                   y = index, yend = index), 
               size = 2, color = "#456613") +
  geom_point(size = 3.5, aes(shape = Disasters)) +
  scale_shape_manual(breaks=c("One-year lag","Two-year lag","Three-year lag"),
                     values = c(16,3,4)) +
  scale_color_manual(values = c("black", "black", "black")) +
  geom_vline(xintercept = 0, linetype = 1, colour = "black") +
  scale_y_continuous(trans = "reverse",
                     breaks = c(1:11),
                     labels = unique(labels_vector$labels)) +
  labs(x="Regression coefficient and confidence intervals", 
       y="") +
  geom_vline(xintercept = 0) +
  theme(panel.spacing = unit(1, "lines")) +
  theme_tufte(base_family = "Helvetica") +
  theme(legend.position = "bottom",
        panel.background = element_rect(fill=NA),
        panel.grid.major.x = element_line(colour = "grey90"),
        panel.grid.minor.x = element_line(colour = "grey90"),
        # panel.grid.major.y = element_line(colour = "grey90"),
        panel.ontop = FALSE,
        text=element_text(size=16))
ggsave("text/coefplot_emdat_lags.pdf", units = "in", height = 8, width = 10)


